example_graph = nx.DiGraph()
example_graph.add_nodes_from([1,2,3,4,5,6,7])
example_graph.add_edges_from([(3,2), (4,2), (6,5), (7,6)])
matplotlib.style.use(SBSTYLE)
nx.draw_networkx(example_graph, pos={1: (0,0), 2: (0,1), 3: (1,1), 4: (1, 1.5), 5: (0, 2), 6: (1, 2), 7: (2,2)},
node_list=[1,2,3,4,5,6,7],
node_color=[1, 2, 1, 3, 2, 1,2])
limits=plt.axis("off")
example_graph2 = nx.DiGraph()
example_graph2.add_nodes_from(['Alice', 'Bob', 'Carol'])
example_graph2.add_weighted_edges_from([('Alice', 'Bob', 2), ('Bob', 'Alice', 1), ('Carol', 'Bob', 1)])
matplotlib.style.use(SBSTYLE)
nx.draw_networkx(example_graph2, node_list=['Alice', 'Bob', 'Carol'], node_color=[1,3,2], node_size=1000)
limits=plt.axis("off")
from matplotlib.ticker import FuncFormatter
PROJECTS_TO_C = ["Polymath {}".format(i) for i in range(1, 12)]
PARTICIPANTS = Series([PM_FRAME.loc[project]['authors (accumulated)'].iloc[-1] for
project in PROJECTS_TO_C], index=PROJECTS_TO_C)
R_NETWORKS = Series([PM_FRAME.loc[project]['r_network'].dropna().iloc[-1] for project in PROJECTS_TO_C],
index=PROJECTS_TO_C)
WITH_D = [project for project in PROJECTS_TO_C if not PM_FRAME.loc[project]['research'].all()]
D_NETWORKS = Series([PM_FRAME.loc[project]['d_network'].dropna().iloc[-1] for project in WITH_D],
index=WITH_D)
R_PARTICIPANTS = R_NETWORKS.apply(lambda network: set(network.author_frame.index))
D_PARTICIPANTS = D_NETWORKS.apply(lambda network: set(network.author_frame.index))
COMMENTS = Series([PM_FRAME.loc[project]['number of comments (accumulated)'].iloc[-1] for
project in PROJECTS_TO_C], index=PROJECTS_TO_C)
df = DataFrame({'all threads': PARTICIPANTS, 'research threads': R_PARTICIPANTS, 'discussion threads': D_PARTICIPANTS},
index=PROJECTS_TO_C)
df['authors only active in research threads'] = df['research threads'] - df['discussion threads']
df['authors only active in "discussion" threads'] = df['discussion threads'] - df['research threads']
df['authors active in both types of threads'] = df['all threads'] - df['authors only active in research threads'] - df['authors only active in "discussion" threads']
for project in PROJECTS_TO_C:
if pd.isnull(df.loc[project]['authors only active in research threads']):
df.loc[project]['authors only active in research threads'] = df.loc[project]['all threads']
data = df[['authors only active in research threads', 'authors only active in "discussion" threads', 'authors active in both types of threads']]
data = data.applymap(lambda set: len(set) if pd.notnull(set) else 0)
matplotlib.style.use(SBSTYLE)
axes = data.plot(kind='bar', stacked=True, color=['steelblue', 'lightsteelblue', 'lightgrey'],
title="Number of participants per thread-type in each Polymath project\n Number of comments per project")
axes.set_ylabel("Number of participants")
axes.annotate('published', xy=(0, 115), xytext=(0, 130),
arrowprops=dict(facecolor='steelblue', shrink=0.05),
)
axes.annotate('published', xy=(3, 60), xytext=(1.5, 80),
arrowprops=dict(facecolor='steelblue', shrink=0.05),
)
axes.annotate('re-used', xy=(4, 130), xytext=(4.5, 140),
arrowprops=dict(facecolor='lightsteelblue', shrink=0.05),
)
axes.annotate('published', xy=(7, 155), xytext=(7.5, 170),
arrowprops=dict(facecolor='steelblue', shrink=0.05),
)
data2 = np.sqrt(COMMENTS)
axes2 = axes.twinx()
axes2.yaxis.set_major_formatter(FuncFormatter(lambda x, pos:"{:0.0f}".format(np.square(x))))
axes2.set_ylabel("Number of comments")
axes2.plot(axes.get_xticks(), data2.values,
linestyle='-', marker='.', linewidth=.5,
color='darkgrey')
[<matplotlib.lines.Line2D at 0x1c7aa8ef0>]
plot_community_evolution("Polymaths")
<matplotlib.figure.Figure at 0x1c74cf2e8>
select_n = plot_participation_evolution("Polymath", n=2)
(threshold: participation to at least two projects)
from mpl_toolkits.axes_grid1 import make_axes_locatable
authors_n = sorted([author for author, bool in select_n.items() if bool])
def general_heatmap(authors=None, binary=False, thread_level=True,
binary_method='average', method='ward', log=True,
fontsize=8):
if thread_level:
authors_filtered = list(ALL_AUTHORS)
try:
authors_filtered.remove("Anonymous")
except:
pass
data=PM_FRAME['comment_counter']
else:
authors_filtered = list(ALL_AUTHORS) if not authors else authors
try:
authors_filtered.remove("Anonymous")
except:
pass
data = get_last(POLYMATHS)[0]['comment_counter (accumulated)']
if binary:
as_matrix=np.array([[True if author in data[thread] else False for author in authors_filtered]
for thread in data.index])
Z_author = linkage(as_matrix.T, method=binary_method, metric='hamming')
Z_thread = linkage(as_matrix, method=binary_method, metric='hamming')
c, _ = cophenet(Z_author, pdist(as_matrix.T))
print("Cophenetic Correlation Coefficient with {}: {}".format(binary_method, c))
else:
as_matrix = []
for thread in data.index:
new_row = [data.loc[thread][author] for author in authors_filtered]
as_matrix.append(new_row)
as_matrix = np.array(as_matrix)
Z_author = linkage(as_matrix.T, method=method, metric='euclidean')
Z_thread = linkage(as_matrix, method=method, metric='euclidean')
c, _ = cophenet(Z_author, pdist(as_matrix.T))
print("Cophenetic Correlation Coefficient with {}: {}".format(method, c))
# start setting up plots
matplotlib.style.use(SBSTYLE)
fig, ax_heatmap = plt.subplots()
# compute and plot dendogram (top-plot)
ddata_author = dendrogram(Z_author, color_threshold=.07,
no_plot=True)
ddata_thread = dendrogram(Z_thread, color_threshold=.07, no_plot=True)
df = DataFrame(as_matrix, columns=authors_filtered)
cols = [authors_filtered[i] for i in ddata_author['leaves']]
df = df[cols]
rows = [df.index[i] for i in ddata_thread['leaves']]
df = df.reindex(rows)
# plot heatmap (bottom)
heatmap = ax_heatmap.pcolor(df,
edgecolors='w',
cmap=mpl.cm.binary if binary else mpl.cm.GnBu,
norm=mpl.colors.LogNorm() if log else None)
ax_heatmap.autoscale(tight=True) # get rid of whitespace in margins of heatmap
ax_heatmap.set_aspect('equal') # ensure heatmap cells are square
ax_heatmap.xaxis.set_ticks_position('bottom') # put column labels at the bottom
ax_heatmap.tick_params(bottom='off', top='off', left='off', right='off') # turn off ticks
ax_heatmap.set_title("Project-Engagement in Polymath")
ax_heatmap.set_yticks(np.arange(0.5, len(df.index)+.5, 1))
ax_heatmap.set_yticklabels(df.index + 1, fontsize=fontsize)
ax_heatmap.set_xticks(np.arange(len(df.columns)) + 0.5)
ax_heatmap.set_xticklabels(df.columns, rotation=90, fontsize=fontsize)
if not binary:
divider_h = make_axes_locatable(ax_heatmap)
cax = divider_h.append_axes("right", "3%", pad="1%")
plt.colorbar(heatmap, cax=cax)
lines = (ax_heatmap.xaxis.get_ticklines() +
ax_heatmap.yaxis.get_ticklines())
plt.setp(lines, visible=False)
plt.tight_layout()
general_heatmap(authors=authors_n, thread_level=False,
binary=False, log=True)
Cophenetic Correlation Coefficient with ward: 0.9424851136308227
Note: Zollman's model could be seen as private announcements/observations only.
project_heatmap("Polymath 4", cluster_threads=True, method='average', log=True, fontsize=10)
project_heatmap("Polymath 1", cluster_threads=False, method='average', log=True, fontsize=9)
draw_network("Polymath 4", graph_type="interaction", reset=True)
import io
import base64
from IPython.display import HTML
video = io.open('FIGS/out.m4v', 'r+b').read()
encoded = base64.b64encode(video)
HTML(data='''<video alt="test" controls>
<source src="data:video/mp4;base64,{0}" type="video/mp4" />
</video>'''.format(encoded.decode('ascii')))
MeanShift algorithm to identify clusters in lists of time-stamps.plot_activity("Polymath 4", color_by="author", first="2009-07-01", last="2009-08-15")
draw_network("Polymath 4", graph_type="cluster")
draw_network("Polymath 4", graph_type="interaction")
print("Density of co-location network: ", nx.density(pm4_c))
print("Density of interaction network: ", nx.density(pm4_i))
Density of co-location network: 0.18506493506493507 Density of interaction network: 0.05357142857142857
#TODO: use sizes as node_size
def make_network(ser, network, edge_list):
edge_dict = {(i,j): k for (i,j,k) in edge_list}
df = DataFrame(index=ser.index, columns=ser.index)
for (i,j) in edge_dict.keys():
df[i][j] = edge_dict[(i,j)]
df = df.fillna(0)
network.add_nodes_from(ser.index)
network.add_weighted_edges_from(edge_list)
return network,df
def digraph_from_series_of_sets(ser):
network = nx.DiGraph()
edge_list = [(i, j, (len(ser[i] & ser[j]) / len(ser[i]))) for (i, j) in permutations(ser.index, 2)]
return make_network(ser, network, edge_list)
def graph_from_series_of_sets(ser):
network = nx.Graph()
edge_list = [(i, j, (len(ser[i] & ser[j]) / len(ser[i] | ser[j]))) for (i, j) in combinations(ser.index, 2)]
#edge_list = [(i, j, (len(ser[i] & ser[j]))) for (i, j) in combinations(ser.index, 2)]
return make_network(ser, network, edge_list)
thread_ser = PM_FRAME['authors'].copy()
thread_ser.index = [project + " " + str(i) for (project, i) in thread_ser.index.values]
a_netw, a_df = graph_from_series_of_sets(thread_ser)
nx.draw_networkx(a_netw)
a_netw = nx.DiGraph([(u,v,d) for u,v,d in a_netw.edges(data=True) if d['weight'] >= .33])
a_netw_edges = a_netw.edges()
a_netw_weights = [a_netw[source][dest]['weight'] for source, dest in a_netw_edges]
a_netw_colors = [plt.cm.Blues(weight*15) for weight in a_netw_weights]
nx.draw_networkx(a_netw, edges=a_netw_edges, width=1, edge_color=a_netw_colors)
for comp in nx.weakly_connected_components(a_netw):
if len(comp) > 1:
for project in sorted(list(comp)):
print(project)
print()
Polymath 1 1 Polymath 1 11 Polymath 1 12 Polymath 1 13 Polymath 1 15 Polymath 1 16 Polymath 1 2 Polymath 1 3 Polymath 1 4 Polymath 1 5 Polymath 1 8 Polymath 1 9 Polymath 10 0 Polymath 10 1 Polymath 10 2 Polymath 10 3 Polymath 10 4 Polymath 11 0 Polymath 11 1 Polymath 11 2 Polymath 11 3 Polymath 11 4 Polymath 3 0 Polymath 3 1 Polymath 3 11 Polymath 3 2 Polymath 3 3 Polymath 3 4 Polymath 3 5 Polymath 3 7 Polymath 3 8 Polymath 3 9 Polymath 4 0 Polymath 4 1 Polymath 4 2 Polymath 4 3 Polymath 4 5 Polymath 4 6 Polymath 5 0 Polymath 5 1 Polymath 5 10 Polymath 5 11 Polymath 5 12 Polymath 5 13 Polymath 5 14 Polymath 5 15 Polymath 5 16 Polymath 5 17 Polymath 5 18 Polymath 5 19 Polymath 5 2 Polymath 5 20 Polymath 5 21 Polymath 5 22 Polymath 5 23 Polymath 5 27 Polymath 5 28 Polymath 5 3 Polymath 5 31 Polymath 5 33 Polymath 5 4 Polymath 5 5 Polymath 5 6 Polymath 5 7 Polymath 5 9 Polymath 6 1 Polymath 9 2 Polymath 7 0 Polymath 7 1 Polymath 7 2 Polymath 7 3 Polymath 7 4 Polymath 7 5 Polymath 7 6 Polymath 8 12 Polymath 8 13 Polymath 8 4
project_ser = get_last(POLYMATHS)[0]['authors (accumulated)']
project_network, project_matrix = graph_from_series_of_sets(project_ser)
project_nodes, project_sizes = project_ser.index.tolist(), project_ser.apply(len).tolist()
project_edges = project_network.edges()
project_weights = [project_network[source][dest]['weight'] for source, dest in project_edges]
project_colors = [plt.cm.Blues(weight*15) for weight in project_weights]
nx.draw_networkx(project_network, nodelist=project_nodes, node_size=project_sizes, edges=project_edges, width=1, edge_color=project_colors)
project_matrix
| Project | Polymath 1 | Polymath 2 | Polymath 3 | Polymath 4 | Polymath 5 | Polymath 6 | Polymath 7 | Polymath 8 | Polymath 9 | Polymath 10 | Polymath 11 |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Project | |||||||||||
| Polymath 1 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0 |
| Polymath 2 | 0.027273 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0 |
| Polymath 3 | 0.062500 | 0.018868 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0 |
| Polymath 4 | 0.140845 | 0.032787 | 0.095745 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0 |
| Polymath 5 | 0.127389 | 0.026316 | 0.082569 | 0.114035 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0 |
| Polymath 6 | 0.062500 | 0.052632 | 0.090909 | 0.078125 | 0.090909 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0 |
| Polymath 7 | 0.031496 | 0.000000 | 0.043478 | 0.051948 | 0.032258 | 0.055556 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0 |
| Polymath 8 | 0.055351 | 0.005376 | 0.027149 | 0.063063 | 0.050209 | 0.026596 | 0.030151 | 0.000000 | 0.000000 | 0.000000 | 0 |
| Polymath 9 | 0.049587 | 0.037037 | 0.079365 | 0.084507 | 0.069767 | 0.096774 | 0.095238 | 0.036082 | 0.000000 | 0.000000 | 0 |
| Polymath 10 | 0.037313 | 0.025641 | 0.081081 | 0.085366 | 0.050505 | 0.121951 | 0.054545 | 0.028986 | 0.102041 | 0.000000 | 0 |
| Polymath 11 | 0.027586 | 0.063830 | 0.046512 | 0.053191 | 0.055556 | 0.056604 | 0.030303 | 0.032407 | 0.103448 | 0.085714 | 0 |
project_nodes.tolist()
['Polymath 1', 'Polymath 2', 'Polymath 3', 'Polymath 4', 'Polymath 5', 'Polymath 6', 'Polymath 7', 'Polymath 8', 'Polymath 9', 'Polymath 10', 'Polymath 11']
f_project_network = nx.Graph([(u,v,d) for u,v,d in project_network.edges(data=True) if d['weight'] >= .075])
f_project_edges = f_project_network.edges()
f_project_weights = [f_project_network[source][dest]['weight'] for source, dest in f_project_edges]
f_project_colors = [plt.cm.Blues(weight*15) for weight in f_project_weights]
nx.draw_networkx(f_project_network, edges=f_project_edges, width=1, edge_color=f_project_colors)
DataFrame.from_records([(1,2,3), (2,1,4)])
| 0 | 1 | 2 | |
|---|---|---|---|
| 0 | 1 | 2 | 3 |
| 1 | 2 | 1 | 4 |